# Calculates boostrap standard errors for in-sample and projection
# counterfactuals

# See "do_counterfactuals_insample.py" for the insample calculation
# See "do_counterfactuals_future.py" for the out of sample calculation
# Use "plot_counterfactuals.py" to make Figures 7 and 8

# Jacob Moscona and Karthik Sastry
# This version: last edited on September 28, 2022

##
## Importing libraries and files
##

import numpy as np
import pandas as pd
import os
import sys


wd = '/home/karthik/Dropbox (MIT)/climate_crops/QJE_Submission/Replication/Data/'
os.chdir(wd)


##
## Options
##

load = False ## If true, load the simulations and just report the key numbers

# To do the future counterfactuals, set 
# useFuture = True and select the path and final period
useFuture = False
path = '45'
tf = 2050

# Select what coefficients / model to use
mode = 'baseline'
#mode = 'area_wt'
#mode = 'prices'

## Options that matter for both cases
t0 = 1960 # Initial decade
t1 = 2010 # Final decade

metric = 'gdd' # Name of the metric being used


# Name the output file
fname = 'bootstrap_out_' + mode
if useFuture:
    fname += '_' + path + '_' + str(tf)

    
##
## Pre loading the real data
##

if useFuture: #  If true, use the future data
    
    future_file = 'county_shocks' + '_' + path + '_' + str(tf) + '_2012a.csv'
    dta = pd.read_csv(future_file)
    
    # Merge final areas
    dta_extra = pd.read_csv('county_extra_for_counterfactual.csv')
    dta = dta.merge(dta_extra, on =['CNTY_FIPS','STATE_FIPS'])
    
    
    key_columns = [] # list of key columns
    for y in (t1,tf):
        for x in ('own','loo'):
            key_columns.append(metric + '_' + x + '_' + str(y))
        if mode == 'prices':
            key_columns.append('logprice_own_' + str(y))

    
    key_columns.append('total_area_acres_2017')

else: # Use historical data
    ## Load main data file
    historical_file = 'county_shocks.csv'
    # Historical values of own, leave one out, etc.
    dta = pd.read_csv(historical_file)
    
    key_columns = [] # list of key columns
    for y in (t0,t1):
        for x in ('own','loo'):
            key_columns.append(metric + '_' + x + '_' + str(y))
        if mode == 'prices':
            key_columns.append('logprice_own_' + str(y))


if mode == 'prices': ## Add prices if necessary
    # Set price flag 
    priceFlag = True
    # Add data on prices
    dta_extra = pd.read_csv('county_extra_for_counterfactual.csv')
    dta = dta.merge(dta_extra, on =['CNTY_FIPS','STATE_FIPS'])    
else:
    priceFlag = False

dta_main = dta.copy() # Constant across iterations
dta_main = dta_main.set_index(['STATE_FIPS','CNTY_FIPS'])



##
## Main bootstrap
## 

if not load: # Do the simulation (otherwise load the draws; see below for the else)
        
    ## Load coefficients
    coef_file = 'bootstrap_results/' + mode + '/gdd_panel_' + mode + '_COEFF_1000.xlsx'
    coefs = pd.read_excel(coef_file)
    
    nboot = 1000
    
    out_kahuna = []
    out_damage_noi = []
    out_damage = []
    
    for i in range(1,nboot+1):
        
        ##
        ## Setting coefficients
        ##
        
        
        
        ## GDD, panel, 1950 to 2010
        # 1000 draws version
        fe_file = 'bootstrap_results/' + mode + '/gdd_panel_' + mode + '_FE_' + str(i) + '_1000.xlsx'

        beta = coefs.iloc[0,i-1]
        gamma = coefs.iloc[1,i-1]
        phi = coefs.iloc[2,i-1]
        k = coefs.iloc[3,i-1]
        
        if mode == 'prices':
            priceC = coefs.iloc[3,i-1]
            priceInt = coefs.iloc[4,i-1]
            k = coefs.iloc[5,i-1]
    
            priceFlag = True
    
    
        
        # Values of fixed effect 
        dta_fe = pd.read_excel(fe_file)
        dta_fe.columns = ['year','CNTY_FIPS','STATE_FIPS','area','area_init','ife','tfe']
        
       
        dta = dta_fe.loc[dta_fe.year == t0,['CNTY_FIPS','STATE_FIPS','ife','tfe','area_init']]
        
        if useFuture:
            dta = dta.rename(columns = {'tfe':'tfeold'})
        else:
            dta = dta.rename(columns = {'tfe':'tfe0'})
            
        dta['new_index'] = dta.index
        tfe_file = dta_fe.loc[dta_fe.year == t1,['CNTY_FIPS','STATE_FIPS','tfe']]
        # Merge TFE1
        dta = dta.merge(tfe_file, on =
                        ['CNTY_FIPS','STATE_FIPS'],how='left',copy=False)
        
        if useFuture:
            dta = dta.rename(columns = {'tfe':'tfe0'})
            
            scale = (tf-2010)/50
            dta['tfe1'] = dta['tfe0'] + scale * (dta['tfe0']-dta['tfeold'])

        else:
            dta = dta.rename(columns = {'tfe':'tfe1'})
        
        # Drop duplicates
        dta = dta.drop_duplicates()
        
        # Merge the climate data
        places = list(dta.loc[:,['STATE_FIPS','CNTY_FIPS']].to_records(index=False))
        for c in key_columns:
            dta.loc[:,c] = 0
        dta.loc[:,key_columns] = dta_main.reindex(pd.MultiIndex.from_tuples(places),columns = key_columns).values
        
        if priceFlag:
            price0 = dta['logprice_own_' + str(t0)].values
            price1 = dta['logprice_own_' + str(t1)].values
    
        
        rho_i = dta['ife'].values
        alpha = [[],[]]
        alpha[0] = dta['tfe0'].values+k
        alpha[1] = dta['tfe1'].values+k
        
        if useFuture:
            area = dta.total_area_acres_2017.values
        else:   
            area = dta.area_init.values
        
            
        ##
        ## Main function
        ##
            
        def fitted_values(own,loo,interact,alpha,rho):
            return rho + alpha + beta * own + gamma * loo + phi * (interact)
            
        def calculate_land_values(own0,loo0,own1,loo1,rho_i,offset=0):
            
            # Calculates the fitted land value in each county
            # Plus a counterfactual that holds constant loo for the calculation of 
            # interact in the future period
            
            fit0 = fitted_values(own0,loo0,own0*loo0,alpha[0],rho_i)
            fit1 = fitted_values(own1,loo1,own1*loo1,alpha[1],rho_i)
            
            int_noi = own1 * (loo0 + offset) # counterfactual eco interact
            fit_noi = fitted_values(own1,loo1,int_noi,alpha[1],rho_i) # no innovation
            
            fit_nocc = fitted_values(own0,loo0,own0*loo0,alpha[1],rho_i)
            
            if priceFlag: # Add price corrections
                fit0 += priceC * price0 + priceInt * (price0 * own0)
                fit1 += priceC * price1 + priceInt * (price1 * own1)
                fit_noi += priceC * price1 + priceInt * (price1 * own1)
                fit_nocc += priceC * price1 + priceInt * (price1 * own0)
                
                
            return(fit0,fit1,fit_noi,fit_nocc)
            
        ##
        ## Calculation
        ##
        
        if useFuture:
            # Outliers
            dta[metric + '_own_' + str(t1)] = np.minimum(dta[metric + '_own_' + str(t1)],15000)
            dta[metric + '_own_' + str(tf)] = np.minimum(dta[metric + '_own_' + str(tf)],15000)

            own0 = dta[metric + '_own_' + str(t1)]
            loo0 = dta[metric + '_loo_' + str(t1)]     
            
            own1 = dta[metric + '_own_' + str(tf)]
            loo1 = dta[metric + '_loo_' + str(tf)]
            


            
        else:
            own0 = dta[metric + '_own_' + str(t0)]
            loo0 = dta[metric + '_loo_' + str(t0)]     
            
            own1 = dta[metric + '_own_' + str(t1)]
            loo1 = dta[metric + '_loo_' + str(t1)]
        
        # Treating the zero as zero; offset = 0
        fit0,fit1,fit_noi,fit_nocc = calculate_land_values(
                own0,loo0,own1,loo1,
                    rho_i,
                    offset = 0
                )
        
        ##
        ## Total summary
        ##
        
        def logmean(x):
            return np.log(np.nanmean(np.exp(x))) 
            
        
        
        awt = area
        loss_with_innovation = np.nansum(awt* np.exp(fit_nocc) - awt* np.exp(fit1))
        loss_without_innovation = np.nansum(awt* np.exp(fit_nocc) - awt* np.exp(fit_noi))
        
        innovation_saved = loss_without_innovation - loss_with_innovation
        big_kahuna = 100 * innovation_saved / loss_without_innovation
        
        percent_damage = 100 * loss_with_innovation / np.nansum(awt* np.exp(fit_nocc))
        percent_damage_noi = 100 * loss_without_innovation / np.nansum(awt* np.exp(fit_nocc))
        
        
        total_val = np.nansum(awt* np.exp(fit1))
        noi_val = np.nansum(awt* np.exp(fit_noi))
        nocc_val = np.nansum(awt* np.exp(fit_nocc))
        
        
        ##
        ## Save everything
        ##
        
        out_kahuna.append(big_kahuna)
        out_damage.append(percent_damage)
        out_damage_noi.append(percent_damage_noi)
        
        print('Done with ' + str(i))
        
    ## Summarize results
    CI = 0.95
    alpha = (1-CI)/2
    
    out_kahuna = np.array(out_kahuna)
    out_damage = np.array(out_damage)
    out_damage_noi = np.array(out_damage_noi)
    
    ## Save
    bootstrap_results = pd.DataFrame(index=range(1,nboot+1),
                                     data = {'kahuna':out_kahuna,
                                             'damage':out_damage,
                                             'damage_noi':out_damage_noi})
    # bootstrap_results.to_csv(fname + '.csv')
else: ## Load the results
    
    bootstrap_results = pd.read_csv(fname + '.csv')
    

print("Mitigation SE:")
print(bootstrap_results['kahuna'].std())

print("Damage SE:")
print(bootstrap_results['damage'].std())

print("Damage no I SE:")
print(bootstrap_results['damage_noi'].std())


